
/*
 * GPU kernel to calculate sum and sum of square 
 */
__global__ void sum_sum2_kernel(float *out_sum, float *out_sum2, float *in, int n, int m);

/*
 * GPU kernel to subtract vector B from A 
 * Work only on 1D grid
 */
__global__ void sub_kernel(float *A, float *B, int size);

